{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sever.data_loader import make_mask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "409600"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "img_size = 256 * 1600\n",
    "img_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ImageId_ClassId</th>\n",
       "      <th>EncodedPixels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0002cc93b.jpg_1</td>\n",
       "      <td>29102 12 29346 24 29602 24 29858 24 30114 24 3...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0002cc93b.jpg_2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0002cc93b.jpg_3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0002cc93b.jpg_4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>00031f466.jpg_1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ImageId_ClassId                                      EncodedPixels\n",
       "0  0002cc93b.jpg_1  29102 12 29346 24 29602 24 29858 24 30114 24 3...\n",
       "1  0002cc93b.jpg_2                                                NaN\n",
       "2  0002cc93b.jpg_3                                                NaN\n",
       "3  0002cc93b.jpg_4                                                NaN\n",
       "4  00031f466.jpg_1                                                NaN"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_csv = '../data/raw/severstal-steel-defect-detection/train.csv'\n",
    "df = pd.read_csv(train_csv)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "N_CLASSES = 4\n",
    "rle_cols = [f'rle{i}' for i in range(N_CLASSES)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rle0</th>\n",
       "      <th>rle1</th>\n",
       "      <th>rle2</th>\n",
       "      <th>rle3</th>\n",
       "      <th>defects</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0002cc93b.jpg</td>\n",
       "      <td>29102 12 29346 24 29602 24 29858 24 30114 24 3...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0007a71bf.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18661 28 18863 82 19091 110 19347 110 19603 11...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>000a4bcdd.jpg</td>\n",
       "      <td>37607 3 37858 8 38108 14 38359 20 38610 25 388...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>000f6bf48.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131973 1 132228 4 132483 6 132738 8 132993 11 ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0014fce06.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>229501 11 229741 33 229981 55 230221 77 230468...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                            rle0 rle1  \\\n",
       "ImageId                                                                 \n",
       "0002cc93b.jpg  29102 12 29346 24 29602 24 29858 24 30114 24 3...  NaN   \n",
       "0007a71bf.jpg                                                NaN  NaN   \n",
       "000a4bcdd.jpg  37607 3 37858 8 38108 14 38359 20 38610 25 388...  NaN   \n",
       "000f6bf48.jpg                                                NaN  NaN   \n",
       "0014fce06.jpg                                                NaN  NaN   \n",
       "\n",
       "                                                            rle2  \\\n",
       "ImageId                                                            \n",
       "0002cc93b.jpg                                                NaN   \n",
       "0007a71bf.jpg  18661 28 18863 82 19091 110 19347 110 19603 11...   \n",
       "000a4bcdd.jpg                                                NaN   \n",
       "000f6bf48.jpg                                                NaN   \n",
       "0014fce06.jpg  229501 11 229741 33 229981 55 230221 77 230468...   \n",
       "\n",
       "                                                            rle3  defects  \n",
       "ImageId                                                                    \n",
       "0002cc93b.jpg                                                NaN        1  \n",
       "0007a71bf.jpg                                                NaN        1  \n",
       "000a4bcdd.jpg                                                NaN        1  \n",
       "000f6bf48.jpg  131973 1 132228 4 132483 6 132738 8 132993 11 ...        1  \n",
       "0014fce06.jpg                                                NaN        1  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['ImageId'], df['ClassId'] = zip(*df['ImageId_ClassId'].str.split('_'))\n",
    "df['ClassId'] = df['ClassId'].astype(int)\n",
    "df = df.pivot(index='ImageId', columns='ClassId', values='EncodedPixels')\n",
    "df.columns = rle_cols\n",
    "df['defects'] = df.count(axis=1)\n",
    "df = df.loc[df.defects > 0, :]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['0002cc93b.jpg', '0007a71bf.jpg', '000a4bcdd.jpg'], dtype=object)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filenames = df.index.values\n",
    "filenames[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['374493292.jpg', '8fe172794.jpg', '24c9c291d.jpg'], dtype=object)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.shuffle(filenames)\n",
    "filenames[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rle0</th>\n",
       "      <th>rle1</th>\n",
       "      <th>rle2</th>\n",
       "      <th>rle3</th>\n",
       "      <th>defects</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0002cc93b.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>373611 89 373826 185 374041 226 374276 247 374...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0007a71bf.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84 28 318 56 553 83 787 111 1032 125 1288 125 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>000a4bcdd.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52349 132 52605 132 52861 132 53117 132 53373 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>000f6bf48.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>139523 16 139779 48 140035 80 140291 111 14054...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0014fce06.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>222167 9 222415 19 222670 22 222926 25 223181 ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              rle0 rle1                                               rle2  \\\n",
       "0002cc93b.jpg  NaN  NaN  373611 89 373826 185 374041 226 374276 247 374...   \n",
       "0007a71bf.jpg  NaN  NaN  84 28 318 56 553 83 787 111 1032 125 1288 125 ...   \n",
       "000a4bcdd.jpg  NaN  NaN  52349 132 52605 132 52861 132 53117 132 53373 ...   \n",
       "000f6bf48.jpg  NaN  NaN  139523 16 139779 48 140035 80 140291 111 14054...   \n",
       "0014fce06.jpg  NaN  NaN  222167 9 222415 19 222670 22 222926 25 223181 ...   \n",
       "\n",
       "              rle3  defects  \n",
       "0002cc93b.jpg  NaN        1  \n",
       "0007a71bf.jpg  NaN        1  \n",
       "000a4bcdd.jpg  NaN        1  \n",
       "000f6bf48.jpg  NaN        1  \n",
       "0014fce06.jpg  NaN        1  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.copy()\n",
    "df2.index = filenames\n",
    "df2.sort_index(inplace=True)\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RLE:\n",
    "    \"\"\"\n",
    "    Encapsulates run-length-encoding functionality.\n",
    "    \"\"\"\n",
    "    \n",
    "    MASK_H = 256\n",
    "    MASK_W = 1600\n",
    "    \n",
    "    @classmethod\n",
    "    def from_str(cls, s):\n",
    "        if s != s:\n",
    "            return cls()\n",
    "        list_ = [int(n) for n in s.split(' ')]\n",
    "        return cls.from_list(list_)\n",
    "    \n",
    "    @classmethod\n",
    "    def from_mask(cls, mask):\n",
    "        pixels = mask.T.flatten()\n",
    "        pixels = np.concatenate([[0], pixels, [0]])\n",
    "        runs = np.where(pixels[1:] != pixels[:-1])[0] + 1\n",
    "        runs[1::2] -= runs[::2]\n",
    "        return cls.from_list(runs)\n",
    "    \n",
    "    @classmethod\n",
    "    def from_list(cls, list_):\n",
    "        n_items = len(list_) // 2\n",
    "        items = np.zeros((n_items, 2), dtype=np.uint64)\n",
    "        for i in range(n_items):\n",
    "            items[i, 0] = list_[i * 2]\n",
    "            items[i, 1] = list_[i * 2 + 1]\n",
    "        return cls(items)\n",
    "    \n",
    "    def __init__(self, items=np.zeros((0, 0))):\n",
    "        self._items = items\n",
    "        \n",
    "    @property\n",
    "    def items(self):\n",
    "        return self._items\n",
    "    \n",
    "    def __iter__(self):\n",
    "        for idx, item in enumerate(self.items):\n",
    "            yield (item[0], item[1])  # run, length\n",
    "    \n",
    "    def __len__(self):\n",
    "        return self.items.shape[0]\n",
    "    \n",
    "    def to_mask(self):\n",
    "        mask = np.zeros(self.MASK_H * self.MASK_W, dtype=np.uint8)\n",
    "        for run, length in self:\n",
    "            mask[run:run + length] = 1\n",
    "        return mask.reshape(self.MASK_H, self.MASK_W, order='F')\n",
    "    \n",
    "    def to_str_list(self):\n",
    "        list_ = []\n",
    "        for run, length in self:\n",
    "            list_.append(str(run))\n",
    "            list_.append(str(length))\n",
    "        return list_\n",
    "    \n",
    "    def __str__(self):\n",
    "        if len(self) == 0:\n",
    "            return ''\n",
    "        return ' '.join(self.to_str_list())\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return self.__str__()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "rle0                                                  NaN\n",
       "rle1                                                  NaN\n",
       "rle2    84 28 318 56 553 83 787 111 1032 125 1288 125 ...\n",
       "rle3                                                  NaN\n",
       "Name: 0007a71bf.jpg, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r1 = df.loc['0007a71bf.jpg'][rle_cols]\n",
    "r1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "rle0                                                  NaN\n",
       "rle1                                                  NaN\n",
       "rle2    84 28 318 56 553 83 787 111 1032 125 1288 125 ...\n",
       "rle3                                                  NaN\n",
       "Name: 0007a71bf.jpg, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r2 = df2.loc['0007a71bf.jpg'][rle_cols]\n",
    "r2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x7f4671140240>"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAABYCAYAAAAOTbepAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAKZ0lEQVR4nO3dbYxcVR3H8e/PLa0UbGh5sk+xxVRjTUyBBkGMMaK2IqG8MakRrRHDG0zAh2hrExNfkIgaNMaoaQCDijSkoDQkpjxIYkwUKVigpSwsFOzSQiGKNJLwoH9f3LNh2O52Z6b3zj33zu+TbHbm3Jmd350993/PnLkzVxGBmZm1y9vqDmBmZuVzcTczayEXdzOzFnJxNzNrIRd3M7MWcnE3M2uhyoq7pLWSRiWNSdpY1eOYmdmRVMVx7pJGgMeBTwDjwP3AZyPi0dIfzMzMjlDVyP0cYCwinoqI14CtwLqKHsvMzCaZVdHfXQzs77g+Dnyw8waSLgcuBxhh5Oy5zKsoiplZOx3mXy9GxKlTLauquGuKtrfM/0TEFmALwDwtiJcOnlxRlOZas2hV3RHMLGN3x7ZnpltW1bTMOLC04/oS4EBFj9VaOw7sqjuCmTVUVcX9fmCFpOWSZgPrge0VPVarucCbWT8qmZaJiDckfQXYAYwAN0TEnioeaxjsOLDLUzR21B29+4dNVsmhkL3ynHt3vAEPr35ewbm/tN/dse2BiFg91TJ/QtXMrIVc3BvE8+9m1i0Xd7OW8mBguGVR3N/zgVfqjtAInkM1s25lUdzNzKxcLu5mmfP0ivWjqq8fsJJ5Ssb60YbPSPj4/v64uJtZVnp5pdJ5Wxf6t3JxbwB3WitT7gVxzaJVfU1F5b5eg+bibjbE2loQJ+8c2rRu3XJxN2u5bufdcyqI/Y7ep9PWndjRuLhnblg6ouWn7oJYdoGfUPd6DYqLe4ba3OGsmdpaEHvZeTRtvV3cB6RpHcNsOoOcvqlq9N6PY8lRx/bv4l6Cphbu6TprU9fHZlb1NAe4/0xlpue9iufMxf0YtLUTt+GDL1afiUJWVh/KafRelSoGWi7uPSqrw5a9AZQt93xmw+BYXhW5uHehygI31R57EAW125GQR/H1avKItcy+Mwyj92708py6uE9j0AV9uuU5FFaP4i0HLvCFbgu8vxVykjWLVrmImZXExbga3TyvWRT3xx+eW+vjTxT0YSnq3uBskMrsb8OyjZYh62kZ/yPzmvPOKYv1pw1TG519sOnrcix2HNjFyMLpl2dT3IelaNTdGet+fOtNFcV4YlsbZF+oamAw1d90Hy9kMS1jZu03qKI7LNOsM61fNiN3awZPzViTDPPI3sW9AVxQrS1y6Mu5Ffx+3kPo5jl0cTezoVdXwZ/8uGXu+FzczWygchi9d2Nyxqre2K6Ki7v1rCkbp1mZyhzdD2L7cXFvCBdUs/z0evrCQW7DLu4DVPeX/Q/LUQJmOalrUObj3K0v3lGY5c3F3friKaLmyO3QPxuMGYu7pKWS7pW0V9IeSVem9gWS7pL0RPo9v+M+mySNSRqVtKbKFTAzsyN1M3J/A/h6RLwPOBe4QtJKYCNwT0SsAO5J10nL1gPvB9YCP5M0UkV4q4dH7Wb5m7G4R8TBiHgwXT4M7AUWA+uAG9PNbgQuSZfXAVsj4tWI2AeMAeeUHXyYuJha23haqHo9zblLWgacCdwHnB4RB6HYAQCnpZstBvZ33G08tU3+W5dL2ilp5+u82ntyq4V3NGbN0HVxl3QicCtwVUS8fLSbTtEWRzREbImI1RGx+jjmdBvDzMy60FVxl3QcRWG/KSJuS83PS1qYli8EDqX2cWBpx92XAAfKiWt18qjdrDm6OVpGwPXA3oi4tmPRdmBDurwBuL2jfb2kOZKWAyuAv5UX2czMZtLNJ1TPBz4PPCJp4l2QbwPfA26RdBnwD+AzABGxR9ItwKMUR9pcERH/LT25DZRH7WbNMmNxj4g/M/U8OsAF09znauDqY8hlSZlFtQ3nz7TeeKc8vPwJVZuRC4RZ87i4mw2hunfYdT/+MHBxH6AcOnSvGXLIbO3iPjUYLu5mQ2rQRXbNolUu7APk4j5gvXTwujeEuh/fCm0oik3P30Q+WUdNjtbZqz6ixUfNNFNnn2nS/8+FvR6KOOKbAQYfQjoMjNadowunAC/WHaILzlku5yxXE3I2ISPAuyLi1KkW5DJyH42I1XWHmImknc5ZHucsl3OWpwkZZ+I5dzOzFnJxNzNroVyK+5a6A3TJOcvlnOVyzvI0IeNRZfGGqpmZlSuXkbuZmZXIxd3MrIVqL+6S1koalTQmaWONOZZKulfSXkl7JF2Z2hdIukvSE+n3/I77bEq5RyWtGXDeEUl/l3RHrjklnSRpm6TH0vN6XqY5v5r+57sl3Szp7TnklHSDpEOSdne09ZxL0tmSHknLfpJOwFN1zh+k//vDkn4n6aQcc3Ys+4akkHRK3TlLExG1/QAjwJPAGcBs4CFgZU1ZFgJnpcvvAB4HVgLfBzam9o3ANenyypR3DrA8rcfIAPN+DfgtcEe6nl1O4Ebgy+nybOCk3HJSnLx9H3B8un4L8MUccgIfAc4Cdne09ZyL4kxo51Gcl+EPwKcGkPOTwKx0+Zpcc6b2pcAO4BnglLpzlvVT98j9HGAsIp6KiNeArcC6OoJExMGIeDBdPgzspdjw11EUKdLvS9LldcDWiHg1IvYBYxTrUzlJS4BPA9d1NGeVU9I8io3peoCIeC0iXsotZzILOF7SLGAuxTl/a88ZEX8C/jmpuadcKs5vPC8i/hJFZfpVx30qyxkRd0bEG+nqXynOpZxdzuRHwDeBzqNLastZlrqL+2Jgf8f18dRWK0nLgDOB+4DTI+IgFDsA4LR0szqz/5iiM/6voy23nGcALwC/TNNH10k6IbecEfEs8EOKU0UeBP4dEXfmlrNDr7kWp8uT2wfpSxQjXMgsp6SLgWcj4qFJi7LK2Y+6i/tUc1W1Hpsp6UTgVuCqiHj5aDedoq3y7JIuAg5FxAPd3mWKtkE8x7MoXgL/PCLOBP5DMY0wnbqez/kUo7TlwCLgBEmXHu0uU7TlcDzxdLlqzStpM8W5lG+aaJomz8BzSpoLbAa+M9XiafLk+v8/Qt3FfZxivmvCEoqXxLWQdBxFYb8pIm5Lzc+nl2Kk34dSe13ZzwculvQ0xTTWxyT9JsOc48B4RNyXrm+jKPa55fw4sC8iXoiI14HbgA9lmHNCr7nGeXNKpLO9cpI2ABcBn0tTGLnlfDfFTv2htD0tAR6U9M7Mcval7uJ+P7BC0nJJs4H1wPY6gqR3vK8H9kbEtR2LtgMb0uUNwO0d7eslzZG0HFhB8UZLpSJiU0QsiYhlFM/XHyPi0gxzPgfsl/Te1HQB8GhuOSmmY86VNDf1gQso3m/JLeeEnnKlqZvDks5N6/eFjvtURtJa4FvAxRHxyqT8WeSMiEci4rSIWJa2p3GKgyqeyyln3+p+Rxe4kOLIlCeBzTXm+DDFy6uHgV3p50LgZOAe4In0e0HHfTan3KPU8I458FHePFomu5zAKmBnek5/D8zPNOd3gceA3cCvKY6QqD0ncDPF+wCvUxSey/rJBaxO6/Yk8FPSJ9MrzjlGMWc9sS39Iseck5Y/TTpaps6cZf346wfMzFqo7mkZMzOrgIu7mVkLubibmbWQi7uZWQu5uJuZtZCLu5lZC7m4m5m10P8BAOOa1g3gXBUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "m1 = make_mask(r1)[:, :, 2]\n",
    "plt.imshow(m1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([   987,     31,   1070, ...,     43, 409069,    113])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RLE.from_mask(m1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_mask(labels):\n",
    "    '''Given a row index, return image_id and mask (256, 1600, 4) from the dataframe `df`'''\n",
    "    masks = np.zeros((256, 1600, 4), dtype=np.float32)  # float32 is V.Imp\n",
    "\n",
    "    for idx, label in enumerate(labels.values):\n",
    "        if label == label:  # NaN check\n",
    "            label = label.split(\" \")\n",
    "            positions = map(int, label[0::2])\n",
    "            length = map(int, label[1::2])\n",
    "            mask = np.zeros(256 * 1600, dtype=np.uint8)\n",
    "            for pos, le in zip(positions, length):\n",
    "                mask[pos:(pos + le)] = 1\n",
    "            masks[:, :, idx] = mask.reshape(256, 1600, order='F')\n",
    "    return masks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.74 ms ± 657 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "make_mask(r1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "rles = [RLE.from_str(r) for r in r1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.84 ms ± 19.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "[rle.to_mask() for rle in rles]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([], shape=(0, 0), dtype=float64)"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rles[0].items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_empty.empty"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_nodef = df.loc[df.defects > 0, :]\n",
    "df_nodef.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
